Example 2.¶
In [12]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Audio
In [13]:
from rendering.is3.dataloader_numpy import ImpulsiveStationarySeparation
sr = 44100
dataset = ImpulsiveStationarySeparation()
In [14]:
bkg, impulse, mix, gain, norm_gain = dataset.read_scene(
scene_index=1500, subset="test", dataset="random")
Background
Impulses
Mix
HPSS with a margin of 1¶
In [15]:
# HPSS
from rendering.is3.baselines import hpss
hpss_module = hpss.HarmonicPercussiveDecomposition(
nfft=2048,
window_size=2048,
overlap=0.75,
margin=1.
)
y_p, y_h, _, _ = hpss_module.forward(mix)
print("HPSS/Impulses")
display(Audio(y_p, rate=sr))
print("HPSS/Stationary Background")
display(Audio(y_h, rate=sr))
HPSS/Impulses
HPSS/Stationary Background
HPSS with a margin of 2¶
In [16]:
# HPSS
hpss_module_2 = hpss.HarmonicPercussiveDecomposition(
nfft=2048,
window_size=2048,
overlap=0.75,
margin=2.
)
y_p_2, y_h_2, _, _ = hpss_module_2.forward(mix)
print("HPSS/Impulses")
display(Audio(y_p_2, rate=sr))
print("HPSS/Stationary Background")
display(Audio(y_h_2, rate=sr))
HPSS/Impulses
HPSS/Stationary Background
Wavelet filtering¶
In [17]:
from rendering.is3.baselines import wavelet_script
wavelet_module = wavelet_script.WaveletBaseline(
wavelet="db",
level=13,
sr=sr,
ks=2.,
ks_impulse=6.,
kc=1.,
kernel_size=1025,
)
wavelet_bkg, wavelet_impulse = wavelet_module.forward(mix)
print("Wavelet/Impulses")
display(Audio(wavelet_impulse, rate=sr))
print("Wavelet/Stationary Background")
display(Audio(wavelet_bkg, rate=sr))
Wavelet/Impulses
Wavelet/Stationary Background
Proposed system IS³¶
In [18]:
from rendering.is3.model_wrapper import ModelWrapper
import torch
model = ModelWrapper(
conf_name="014",
job_id=None,
)
_ = model.eval()
y_i, y_s = model.forward(torch.tensor(mix).reshape(1, -1))
print("IS3/Impulses")
display(Audio(y_i[0].detach().numpy(), rate=sr))
print("IS3/Stationary Background")
display(Audio(y_s[0].detach().numpy(), rate=sr))
IS3/Impulses
IS3/Stationary Background
This time, the frequential components of the impulsive track are more spread out, which makes the separation more challenging which leads to a slight reduction in the sound level in the background track at impulse times.
In [19]:
fig, axs = plt.subplots(5, 1, figsize=(15, 12), sharex=True, sharey=True)
fig.suptitle('Comparison of Impulse Separation Methods')
# Plot target impulse
axs[0].plot(impulse)
axs[0].set_title('Target Impulse')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=1) impulse
axs[1].plot(y_p)
axs[1].set_title('HPSS (margin=1) Impulse')
axs[1].set_ylabel('Amplitude')
# Plot HPSS (margin=2) impulse
axs[2].plot(y_p_2)
axs[2].set_title('HPSS (margin=2) Impulse')
axs[2].set_ylabel('Amplitude')
# Plot Wavelet impulse
axs[3].plot(wavelet_impulse)
axs[3].set_title('Wavelet Impulse')
axs[3].set_ylabel('Amplitude')
# Plot IS3 impulse
axs[4].plot(y_i[0].detach().numpy())
axs[4].set_title('IS³ Impulse')
axs[4].set_ylabel('Amplitude')
axs[4].set_xlabel('Sample')
plt.tight_layout()
plt.show()
In [20]:
fig, axs = plt.subplots(5, 1, figsize=(15, 12), sharex=True, sharey=True)
fig.suptitle('Comparison of Stationary/Background Separation Methods')
# Plot target background
axs[0].plot(bkg)
axs[0].set_title('Target Background')
axs[0].set_ylabel('Amplitude')
# Plot HPSS (margin=1) background
axs[1].plot(y_h)
axs[1].set_title('HPSS (margin=1) Background')
axs[1].set_ylabel('Amplitude')
# Plot HPSS (margin=2) background
axs[2].plot(y_h_2)
axs[2].set_title('HPSS (margin=2) Background')
axs[2].set_ylabel('Amplitude')
# Plot Wavelet background
axs[3].plot(wavelet_bkg)
axs[3].set_title('Wavelet Background')
axs[3].set_ylabel('Amplitude')
# Plot IS3 background
axs[4].plot(y_s[0].detach().numpy())
axs[4].set_title('IS³ Background')
axs[4].set_ylabel('Amplitude')
axs[4].set_xlabel('Sample')
plt.tight_layout()
plt.show()
In [ ]: